{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导入必要的包\n",
    "import time\n",
    "from selenium import webdriver\n",
    "from selenium.webdriver.chrome.service import Service\n",
    "from selenium.webdriver.edge.options import Options as EdgeOptions\n",
    "from selenium.webdriver.common.by import By"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 初始化selenium，这里需要注意的是，driver的版本必须与当前使用的浏览器版本保持一致，否则会报错。\n",
    "# service = Service('/usr/local/bin/msedgedriver') # Mac 配置\n",
    "service = Service(r'C:\\cmder\\msedgedriver.exe')  # Windows 配置\n",
    "\n",
    "service.start()\n",
    "\n",
    "# 设定edge浏览器选项\n",
    "options = EdgeOptions()\n",
    "# options.use_chromium = True\n",
    "# options.add_argument(\"headless\")\n",
    "# options.add_argument(\"disable-gpu\")\n",
    "\n",
    "# 创建driver服务\n",
    "driver = webdriver.Remote(service.service_url, options=options)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = 'https://www.douyin.com/video/6736076662053653763'\n",
    "driver.get(url)\n",
    "# 给点时间通过验证\n",
    "time.sleep(5)\n",
    "total_comments = driver.find_element(\n",
    "    By.XPATH,\n",
    "    '//*[@id=\"root\"]/div/div[2]/div/div[1]/div[1]/div[3]/div/div/div[1]/div[2]/span'\n",
    ")\n",
    "# //*[@id=\"dark\"]/div[2]/div/div[1]/div[1]/div[3]/div/div/div[1]/div[2]/span"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [],
   "source": [
    "# total = int(total_comments.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [],
   "source": [
    "# //*[@id=\"dark\"]/div[2]/div/div[1]/div[3]/div/div/div[4]/div[1]/div/div[2]/p/span[1]/span/span/span/span\n",
    "# //*[@id=\"dark\"]/div[2]/div/div[1]/div[3]/div/div/div[4]/div[2]/div/div[2]/p/span[1]/span/span/span/span"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [],
   "source": [
    "# comment1 = driver.find_element(\n",
    "#     By.XPATH,\n",
    "#     '//*[@id=\"dark\"]/div[2]/div/div[1]/div[3]/div/div/div[4]/div[1]/div/div[2]/p/span[1]/span/span/span/span'\n",
    "# )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [],
   "source": [
    "# comment1.text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [],
   "source": [
    "# f = open('评论2.txt','a',encoding='utf-8')\n",
    "# text = '你不说一句“家你看撒”都不是你的感觉了'\n",
    "# # f.write(text+'\\n')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "//*[@id=\"wolai-header-bar\"]/div[2]/span[1]/span[2]/span/span"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "正在抓取第1条评论 学校都统一的嘛我们当年教官也是悄悄就走了\n",
      "正在抓取第2条评论 和我们学校一样，也是军训完汇报表演完，教官就偷偷走了\n",
      "正在抓取第3条评论 教官走的时候什么也没跟我们说，很难受\n",
      "正在抓取第4条评论 结束后我们追出去，教官在走的大巴车上给我竖起大拇指 我瞬间哭成狗。\n",
      "正在抓取第5条评论 为什么女生会喜欢上教官\n",
      "正在抓取第6条评论 教官说过的一句话，你们的一生我们只是过客\n",
      "正在抓取第7条评论 我记得他们说过，我们只是你们成长路上的过客，马上我们将原路返回，你们继续前行。说实话我眼眶湿了。\n",
      "正在抓取第8条评论 教官总是偷偷的走了，我一直好想他 好怀念一起的时光，知道有一天我在澡堂见到了他，我才知道他是我学长，\n",
      "正在抓取第9条评论 我带眼镜，站军姿的时候眼镜光掉，教官经常给我扶眼镜，感觉超级心动\n",
      "正在抓取第10条评论 我们教官也是，悄悄的就走了，桑心\n",
      "正在抓取第11条评论 在前几天，也就是军训的最后一天，看见他们走的背影，瞬间崩溃了\n",
      "正在抓取第12条评论 我们教官提前跟我们说他会半路跳车回来的，结果真的回来了\n",
      "正在抓取第13条评论 我们会操的时候，总教直接教官出列集合，给我们敬个礼就直接走了\n",
      "正在抓取第14条评论 教官走的时候，我一直转着头看他们离开，当时就我一个哭的特别伤心\n",
      "正在抓取第15条评论 有时候.教官就是那个突然出现在我们生命中却又悄无声息离开的人.\n",
      "正在抓取第16条评论 教官，军训结束后你们是不是也偷偷的跑掉？\n",
      "正在抓取第17条评论 我们那年是有一群学生把教官打走的。后来民大把军训停了一年。哈哈哈哈哈哈\n",
      "正在抓取第18条评论 以后还能不能再见啊 陪我们休息时间玩游戏唱歌叫我欢乐豆的教官啊 为什么连再见都不说啊 当时做最后一排如果不是听同学说真的不知道你们会这样离开\n",
      "正在抓取第19条评论 我的教官刚才就是偷偷走了\n"
     ]
    }
   ],
   "source": [
    "f = open('评论.txt', 'a', encoding='utf-8')\n",
    "for count in range(1, 300):\n",
    "    # print(count)\n",
    "    xpath = f'//*[@id=\"root\"]/div/div[2]/div/div[1]/div[3]/div/div/div[4]/div[{count}]/div/div[2]/p/span/span/span/span/span'\n",
    "    # print(xpath)\n",
    "    try:\n",
    "        comment = driver.find_element(By.XPATH, xpath)\n",
    "        content = comment.text\n",
    "        # f.write(comment.text+'\\n')\n",
    "        images = comment.find_elements(By.TAG_NAME, 'img')\n",
    "        time.sleep(1)\n",
    "        # print(dir(comment))\n",
    "        print(f'正在抓取第{count}条评论', content)\n",
    "        for i in images:\n",
    "            content += i.get_property('alt')\n",
    "            # break\n",
    "\n",
    "        # comment.screenshot('test.png')\n",
    "        # break\n",
    "        f.write(content+'\\n')\n",
    "        driver.execute_script('window.scrollBy(0,250)')\n",
    "    except:\n",
    "        continue\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.quit()\n",
    "f.close()"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "11f1dc213e07634baa4c5c321dec03c05dafae643c50f20e6d1a492290c05dc2"
  },
  "kernelspec": {
   "display_name": "Python 3.9.7 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
